{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Concatenating multiple feature extraction methods\n\n\nIn many real-world examples, there are many ways to extract features from a\ndataset. Often it is beneficial to combine several methods to obtain good\nperformance. This example shows how to use ``FeatureUnion`` to combine\nfeatures obtained by PCA and univariate selection.\n\nCombining features using this transformer has the benefit that it allows\ncross validation and grid searches over the whole process.\n\nThe combination used in this example is not particularly helpful on this\ndataset and is only used to illustrate the usage of FeatureUnion.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Author: Andreas Mueller <amueller@ais.uni-bonn.de>\n#\n# License: BSD 3 clause\n\nfrom sklearn.pipeline import Pipeline, FeatureUnion\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.svm import SVC\nfrom sklearn.datasets import load_iris\nfrom sklearn.decomposition import PCA\nfrom sklearn.feature_selection import SelectKBest\n\niris = load_iris()\n\nX, y = iris.data, iris.target\n\n# This dataset is way too high-dimensional. Better do PCA:\npca = PCA(n_components=2)\n\n# Maybe some original features where good, too?\nselection = SelectKBest(k=1)\n\n# Build estimator from PCA and Univariate selection:\n\ncombined_features = FeatureUnion([(\"pca\", pca), (\"univ_select\", selection)])\n\n# Use combined features to transform dataset:\nX_features = combined_features.fit(X, y).transform(X)\nprint(\"Combined space has\", X_features.shape[1], \"features\")\n\nsvm = SVC(kernel=\"linear\")\n\n# Do grid search over k, n_components and C:\n\npipeline = Pipeline([(\"features\", combined_features), (\"svm\", svm)])\n\nparam_grid = dict(features__pca__n_components=[1, 2, 3],\n                  features__univ_select__k=[1, 2],\n                  svm__C=[0.1, 1, 10])\n\ngrid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)\ngrid_search.fit(X, y)\nprint(grid_search.best_estimator_)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}